****************************************************************************************************
* File preparation: Intergenerational transmission of party affiliation within political families *
****************************************************************************************************

cls
clear all
clear matrix
set more off, perm
capture log close
log using "C:/Userdata/Shared/Logs/Parent politicians/fileprep.text", replace text

cd "E:/ProjData/Parents politicians/"
global scb "D:/SCB_ConPol/Stata"

********************************************************************************
// Birth registry
********************************************************************************

use "${scb}/RTB/Fodelseuppg.dta", clear											// Remove duplicates observations
duplicates tag LopNr, gen(dup)
drop if dup>0
drop dup
save "Fodelseuppg.dta", replace


********************************************************************************
// Multigenerational registry
********************************************************************************

use "${scb}/FlerGen/BioForaldrar.dta", clear
compress
save "BioForaldrar.dta", replace

********************************************************************************
// Municipality of residence 
********************************************************************************

foreach y in 1982 1985 1988 1991 1994 1998 2002 2006 2010 2014 {
	use "${scb}/RTB/RTB_`y'.dta", clear
	duplicates tag LopNr, gen(dup)
	drop if dup > 0
	keep LopNr Kommun
	save "rtb_`y'.dta", replace
}

********************************************************************************
// Politician data sets
********************************************************************************

use "${scb}/NomVald/Nominerade_KO_82_85_88_94.dta", clear
replace nrinom = "" if nrinom == "0-" 											//Wrong in register data. 2 observations. Correcting. 
destring nrinom, replace 
rename valar year
keep LopNr year partikod nrinom valtyp
save "nom_KO_82_85_88_94.dta", replace

foreach y in 1991 1998 2002 2006 2010 2014 {
	use "${scb}/NomVald/nom_`y'_KO.dta", clear
	capture gen valtyp = "KO"
	capture destring nrinom, replace 
	capture destring listnr, replace 
	if `y' == 1991 {
			keep LopNr partikod valtyp
		}
	else {
			keep LopNr partikod nrinom listnr valtyp
		}
	gen year = `y'
	save "nom_`y'_KO.dta", replace
	}

use "${scb}/NomVald/Nominerade_RI_82_85_88.dta", clear
replace listnr = subinstr(listnr, `"""', "",.)
destring listnr, replace
destring nrinom, replace
rename valar year
rename Partikod partikod
keep LopNr year partikod nrinom listnr valtyp
save "nom_RI_82_85_88.dta", replace

use "${scb}/NomVald/Nominerade_LT_82_85_88.dta", clear
rename valar year
rename Partikod partikod
keep LopNr year partikod nrinom listnr valtyp
save "nom_LT_82_85_88.dta", replace


foreach e in LT RI {
	foreach y in 1991 1994 1998 2002 2006 2010 2014 {
	use "${scb}/NomVald/nom_`y'_`e'.dta", clear
	capture gen valtyp = "`e'"
	capture destring nrinom, replace 
	capture destring listnr, replace 
		if `y' < 1998 {
			keep LopNr partikod valtyp 
		}
		else {
			keep LopNr partikod nrinom listnr valtyp
		}
	gen year = `y'
	save "nom_`y'_`e'.dta", replace
	}
}


* Merge files together
use "nom_KO_82_85_88_94.dta", clear
append using "nom_LT_82_85_88.dta"
append using "nom_RI_82_85_88.dta" 

foreach e in KO LT RI {
	foreach y in 1991 1994 1998 2002 2006 2010 2014 {
		if !inlist("`e'`y'","KO1994","RI1991","LT1991") {
			append using "nom_`y'_`e'.dta"
		}
	}
}

***************************************************************************************
// Calculate probability to run for each party for people living in the same muni-year*
***************************************************************************************

preserve
gen kommunkod = .
levelsof year, local(years)
foreach y in `years' {
		merge m:1 LopNr using rtb_`y', nogen keep(1 3) keepusing(Kommun)
		replace kommunkod = Kommun if year == `y' & kommunkod == .
		drop Kommun
}
drop if kommunkod == . | year == . | partikod == .
bys LopNr: egen party = mode(partikod)
collapse (firstnm) party, by(kommunkod year LopNr) 								// Removing duplicates observation
drop if party == .
gen partyshare_m = cond(party==1,1,0,.)
gen partyshare_kd = cond(party==68,1,0,.)
gen partyshare_fp = cond(party==3,1,0,.)
gen partyshare_c = cond(party==4,1,0,.)
gen partyshare_mp = cond(party==55,1,0,.)
gen partyshare_s = cond(party==2,1,0,.)
gen partyshare_v = cond(party==5,1,0,.)
gen partyshare_sd = cond(party==110,1,0,.)
collapse (mean) partyshare_*, by(kommunkod year)
save partyshares, replace
restore
		
* Collapse and define party as most common party, nrinom as the lowest number
bys LopNr: egen party = mode(partikod)
bys LopNr: egen modeyear = mode(year)
bys LopNr: egen modevaltyp = mode(valtyp)	
bys listnr: egen mediannr = median(nrinom)
gen tophalf = cond(nrinom<mediannr,1,0,.)
collapse (firstnm) party modeyear modevaltyp (min) nrinom firstyear=year (max) tophalf, by(LopNr)

* Add partyshares for the muni-year of first candidacy, by first merging in municipality of residence at the first time of running
gen kommunkod = .
levelsof firstyear, local(years)
foreach y in `years' {
	merge m:1 LopNr using rtb_`y', nogen keep(1 3) keepusing(Kommun)
	replace kommunkod = Kommun if firstyear == `y' & kommunkod == .
	drop Kommun
}
gen year = firstyear
merge m:1 kommunkod year using partyshares, nogen keep(1 3)
save "politicians.dta", replace

* Erase the separate files
erase "nom_KO_82_85_88_94.dta" 
erase "nom_LT_82_85_88.dta" 
erase "nom_RI_82_85_88.dta"

foreach e in KO LT RI {
	foreach y in 1991 1994 1998 2002 2006 2010 2014 {
		if !inlist("`e'`y'","KO1994") {
			erase "nom_`y'_`e'.dta"
		}
	}
}

* Erase the temporary RTB files
foreach y in 1982 1985 1988 1991 1998 2002 2006 2010 2014 {
	erase "rtb_`y'.dta"
}

********************************************************************************
// Panel over municipalities
********************************************************************************



* Clean files for SAMS of residence 
foreach y in 1982 1985 1988 1991 1994 1998 2002 2006 2010 2014 {
	use "${scb}/SAMS/SAMS_`y'.dta", clear
	gen year = `y'
	gen llkk = real(substr(SAMS,1,4))
	replace llkk = . if llkk == 9999
	duplicates tag LopNr AterPnr SenPnr, gen(dup)
	su dup
	* Remove duplicates
	if r(max) > 0 {
		egen muni = mode(llkk) if dup > 0, by(LopNr AterPnr SenPnr)
		replace muni = llkk if dup == 0
		collapse (firstnm) muni year (sd) sd=muni, by(LopNr AterPnr SenPnr)
		replace muni = . if sd > 0 & sd != .
		drop sd
	}
	else {
		gen muni = llkk
	}
	save "sams_`y'.dta", replace
}

* Merge together
use "sams_1982.dta"
foreach y in 1985 1988 1991 1994 1998 2002 2006 2010 2014 {
	append using "sams_`y'.dta"
}
save "munipanel.dta", replace

* Erase the separate temp files
foreach y in 1982 1985 1988 1991 1994 1998 2002 2006 2010 2014 {
	erase "sams_`y'.dta"
}


********************************************************************************
// Number of politicians: Number used in text. 
********************************************************************************

use "${scb}/NomVald/Valda_2014_RI.dta", clear
gen riksdagen = 1
collapse (firstnm) riksdagen, by(LopNr)
save "riksdagen.dta", replace

use "${scb}/NomVald/Valda_2014_KO.dta", clear
gen ko14 = 1
collapse (firstnm) ko14, by(LopNr)
save "ko14.dta", replace

use "${scb}/NomVald/Valda_2014_LT.dta", clear
gen lt14 = 1
collapse (firstnm) lt14, by(LopNr)
save "lt14.dta", replace

********************************************************************************
// SAMS panel for the "sociological pathway"
********************************************************************************

forvalues k=1982/2014 {
    use "${scb}/SAMS/SAMS_`k'", clear
	keep LopNr SAMS
	duplicates tag LopNr, gen(dup)
	drop if dup>0
	drop dup
	gen year = `k'
	save "SAMS_`k'", replace
 }
 
use "SAMS_1982", clear
forvalues k=1983/2014 {
    append using "SAMS_`k'"
 }
save "samspanel.dta", replace


********************************************************************************
// Income data: The materialistic pathway  
********************************************************************************

* LISA panel income 			
forvalues k=1990/2014 {
	use "${scb}/LISA/LISA_`k'.dta", clear
	keep LopNr LoneInk
	duplicates tag LopNr, gen(d)
	drop if d>0
	drop d
	gen year=`k'
	save "Income_`k'.dta", replace
}

use "Income_1990.dta", clear
forvalues k=1991/2014 {
	append using "Income_`k'"
}
compress
save "Income_panel.dta", replace

* Saving income at age 33 (or closest) and standardize 
merge m:1 LopNr using "Fodelseuppg.dta", nogen keep(1 3)
keep LopNr LoneInk year FodArMan
tostring FodArMan, replace
gen birthyear = substr(FodArMan,1,4) 
destring birthyear, replace 

bys birthyear year: egen inkmean = mean(LoneInk)
bys birthyear year: egen inksd = sd(LoneInk)
gen zink = (LoneInk - inkmean) / inksd											// Standardize income for each cohort
gquantiles incpc = LoneInk, nquantiles(100) xtile by(birthyear year)			// Percentiles (requires gtools) 					
drop inkmean inksd

gen age = year - birthyear
drop if age < 30																
drop if zink == . | age == . | incpc == .
gen dist = abs(age-33)
bys LopNr: egen mindist = min(dist)												// First choice: those who are 33. Second choice: distance from 33, but at least 30
drop if dist > mindist
collapse zink incpc, by(LopNr)
keep LopNr zink incpc
save "Income_panel33.dta", replace


***********************
capture log close
clear all
